In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from JSAnimation import IPython_display
from ipywidgets import StaticInteract, RangeWidget, RadioWidget
from IPython.display import Audio
from skspeech.synthesis.articulatory.kroger import *

plt.rc('figure', max_open_warning=-1)

def compare(contour, articulators, highlight="tongue", figsize=(8, 6)):
    plt.figure(figsize=figsize)
    ax = plt.subplot(121)
    plot_sagittal(structure, contour, ax=ax, highlight=highlight)
    ax = plt.subplot(122)
    art_contour = SagittalContours.from_articulators(articulators)
    plot_sagittal(structure, art_contour, ax=ax, highlight=highlight)
    
def set_vowel(art, vowel="neutral"):
    if vowel == 'a':
        art.toDors1 = -1000
    elif vowel == 'i':
        art.toDors1 = 1000
        art.toDors2 = 1000
    elif vowel == 'u':
        art.toDors1 = 1000
        art.toDors2 = -1000
        art.lips2 = 1000
    else:
        art.toDors1 = 0
        art.toDors2 = 0

def gaussian(x, mean, var):
    return np.exp(-np.power(x - mean, 2.) / (2 * np.power(var, 2.)))

t = np.arange(0, 805, 5)
ga = Articulators(np.zeros((11, t.size)))
ga.toDors3 = gaussian(t, 120, 60) * 1000
ga.toDors1 = np.array([-2, -6, -12, -20, -30, -42, -55, -69, -84, -100, -117,
                       -135, -154, -173, -193, -213, -234, -255, -277, -299,
                       -321, -343, -366, -389, -412, -435, -459, -483, -507,
                       -531, -555, -579, -601, -620, -638, -654, -668, -681, 
                       -692, -702, -711, -719, -727, -734, -740, -746, -751,
                       -755, -759, -763, -766, -769, -772, -774, -776, -778, 
                       -780, -782, -783, -784, -785, -786, -787, -788, -789,
                       -790, -791, -791, -791, -791, -791, -791, -791, -791, 
                       -791, -791, -791, -791, -791, -791, -791, -791, -791,
                       -791, -791, -791, -791, -791, -789, -786, -781, -774,
                       -766, -757, -747, -736, -724, -711, -697, -683, -668,
                       -653, -637, -621, -604, -587, -570, -553, -535, -517,
                       -499, -481, -462, -443, -424, -405, -386, -367, -348,
                       -329, -310, -291, -271, -251, -231, -211, -191, -171,
                       -153, -137, -123, -110, -99, -89, -80, -72, -64, -57,
                       -51, -45, -40, -36, -32, -28, -25, -22, -19, -17, -15,
                       -13, -11, -9, -8, -7, -6, -5, 0, -5, -5, -5, -5])
ga.toDors2 = np.array([0, -1, -2, -4, -6, -9, -12, -15, -19, -23, -27, -31,
                       -36, -41, -46, -51, -56, -61, -66, -71, -77, -83, -89,
                       -95, -101, -107, -113, -119, -125, -131, -137, -143,
                       -148, -153, -157, -161, -164, -167, -170, -173, -175,
                       -177, -179, -181, -182, -183, -184, -185, -186, -187,
                       -188, -189, -190, -191, -191, -191, -191, -191, -191,
                       -191, -191, -191, -191, -191, -191, -191, -191, -191,
                       -191, -191, -191, -191, -191, -191, -191, -191, -191,
                       -191, -191, -191, -191, -191, -191, -191, -191, -191,
                       -191, -191, -191, -190, -189, -188, -186, -184, -182,
                       -179, -176, -173, -170, -167, -163, -159, -155, -151,
                       -147, -143, -139, -135, -131, -126, -121, -116, -111,
                       -106, -101, -96, -91, -86, -81, -76, -71, -66, -61, -56,
                       -51, -46, -41, -36, -32, -28, -25, -22, -19, -17, -15,
                       -13, -11, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 0, 0,
                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
ga.voFoTen = 600
ga.voFoPos = 800
ga.voFoPos[:, int(t.shape[0] * 0.1): int(t.shape[0] * 0.9)] = 10
upline, lowline = midline(structure, SagittalContours.from_articulators(ga))

Building a simple articulatory synthesizer

Computer Science PhD seminar

Trevor Bekolay, Bernd Kroger, Chris Eliasmith

Goal

$\quad \Rightarrow \quad$

speed, interoperability $>$ audio quality

Articulatory speech synthesis

  1. $\quad\quad\quad$ Vocal tract model

  2. $\quad\quad\quad$ Acoustic model

  3. $\quad\quad\quad$ Control model

Vocal tract model: 2D Geometry

[i]
$\quad$
[É‘]
$\quad$
[u]

Phonetics

Studies the sounds of human speech.

"Welcome..."

"/ˈwɛl kəm/..."

Vowels

$\leftrightarrow$ Front-Back $\quad\updownarrow$ Closed-Open $\quad\cdot$ Unrounded-Rounded

Vowels in the synthesizer

  1. Extract edge contours from MRI scans of [i] [É‘] [u] (extreme vowels).

  2. Interpolate between these three to modify entire vocal tract shape.

  • $\leftrightarrow$ [i] = front, [É‘] [u] = back

  • $\updownarrow$ [É‘] = open, [i] [u] = closed

  • $\cdot$ [u] = rounded, [i] [É‘] = unrounded

In [2]:
print type(structure)
print
print structure.__doc__
<class 'skspeech.synthesis.articulatory.kroger.SagittalStructure'>

SagittalStructure describes the static portions of the model.

    The points describe the x, y locations of bones, cartilage, etc.
    They are determined through analysis of just one MRI image
    (vowel a) and are assumed to stay constant.
    

In [3]:
structure.data.shape
Out[3]:
(173, 1, 2)
In [4]:
structure.labels
Out[4]:
['hw1',
 'hw2',
 'hw3',
 'hw4',
 'hw5',
 'hw6',
 'hw7',
 'hw8',
 'hw9',
 'hw10',
 'hw11',
 'sc1',
 'sc2',
 'sc3',
 'sc4',
 'ok1',
 'ok2',
 'ok3',
 'na']
In [5]:
print type(SagittalContours.aa)
print
print SagittalContours.aa.__doc__
<class 'skspeech.synthesis.articulatory.kroger.SagittalContours'>

SagittalContours describes the dynamic portions of the model.

    The points describe the x, y locations of soft tissues, teeth, etc.
    They are determined through analysis of various MRI images,
    or more commonly, through interpolation between many MRI images.
    

In [6]:
SagittalContours.aa.data.shape
Out[6]:
(164, 1, 2)
In [7]:
SagittalContours.labels
Out[7]:
['jaw1',
 'jaw2',
 'jaw3',
 'larynx1',
 'larynx2',
 'lips1',
 'lips2',
 'velum',
 'tongue']
In [8]:
art = Articulators(np.zeros(11))
print type(art)
print
print art.__doc__
<class 'skspeech.synthesis.articulatory.kroger.Articulators'>

The 11 parameters that generate an audio signal.

    These parameters are used to set the vocalic context and
    consonantal constriction of the vocal tract model,
    and are used when generating an audio signal.
    

In [9]:
art.data.shape
Out[9]:
(11, 1)
In [10]:
Articulators.labels
Out[10]:
['toDors2',
 'toDors1',
 'toDors3',
 'lips1',
 'lips2',
 'toTip1',
 'toTip2',
 'velum',
 'voFoPos',
 'voFoTen',
 'luPres']
In [11]:
help(SagittalContours.from_articulators)
Help on method from_articulators in module skspeech.synthesis.articulatory.kroger:

from_articulators(cls, art) method of __builtin__.type instance
    Returns a SagittalContour given an Articulators instance.


In [12]:
art = Articulators(np.zeros(11))
art.toDors1 = 1000  # Closed
art.toDors2 = 1000  # Front
# Unrounded by default
compare(SagittalContours.ii, art)
In [13]:
art = Articulators(np.zeros(11))
art.toDors1 = -1000  # Open
# Front - Back doesn't matter when open
# Unrounded by default
compare(SagittalContours.aa, art)
In [14]:
art = Articulators(np.zeros(11))
art.toDors1 = 1000  # Closed
art.toDors2 = -1000  # Back
art.lips2 = 1000  # Rounded
compare(SagittalContours.uu, art)
In [15]:
def plot(closed, front, rounded):
    fig = plt.figure(figsize=(5,5))
    art = Articulators(np.zeros(11))
    art.toDors1 = closed
    art.toDors2 = front
    art.lips2 = rounded
    plot_sagittal(structure, SagittalContours.from_articulators(art), ax=plt.gca())
    return fig

StaticInteract(plot,
               closed=RangeWidget(-1000, 1000, 500),
               front=RangeWidget(-1000, 1000, 500),
               rounded=RangeWidget(0, 1000, 250))
Out[15]:
closed:
front:
rounded:

Consonants

Consonants in the synthesizer

  1. Extract edge contours from MRI scans of various consonants.

  2. Interpolate between these and the vocalic state in local portions of the vocal tract.

  • Lips: vocalic $\leftrightarrow$ closed

  • Tongue tip: vocalic $\leftrightarrow$ up

  • Tongue dorsum: vocalic $\leftrightarrow$ up

  • Velum: up $\leftrightarrow$ down

In [16]:
# Lips
plt.figure(figsize=(5, 5))
points = 100
art = Articulators(np.zeros((11, points)))
art.lips1 = np.linspace(0, 1000, points)
set_vowel(art, 'a')
animate_sagittal(structure, SagittalContours.from_articulators(art), highlight=('lips1', 'lips2'))
Out[16]:


Once Loop Reflect
In [17]:
# Tongue tip
plt.figure(figsize=(5, 5))
points = 100
art = Articulators(np.zeros((11, points)))
art.toTip1 = np.linspace(0, 1000, points)
set_vowel(art, 'a')
animate_sagittal(structure, SagittalContours.from_articulators(art))
Out[17]:


Once Loop Reflect
In [18]:
# Tongue dorsum
plt.figure(figsize=(5, 5))
points = 100
art = Articulators(np.zeros((11, points)))
art.toDors3 = np.linspace(0, 1000, points)
set_vowel(art, 'a')
animate_sagittal(structure, SagittalContours.from_articulators(art))
Out[18]:


Once Loop Reflect
In [19]:
# Velum
plt.figure(figsize=(5, 5))
points = 100
art = Articulators(np.zeros((11, points)))
art.velum = np.linspace(0, 1000, points)
set_vowel(art, 'a')
animate_sagittal(structure, SagittalContours.from_articulators(art), highlight='velum')
Out[19]:


Once Loop Reflect

Acoustic model:
reflection-type line analog

  • Liljencrants, Johan. Speech synthesis with a reflection-type line analog. PhD Thesis, 1985.
  • Veldhuis, Raymond. "A computationally efficient alternative for the Liljencrants–Fant model and its perceptual evaluation." Journal of the Acoustical Society of America 103.1 (1998): 566-571.

Step 1: Calculate area function from 2D geometry

  1. $\quad\quad$ Segment vocal tract into $n$ sections.
  2. $\quad\quad$ Calculate the area of each section.
In [20]:
# Generate midline
from collections import namedtuple
from matplotlib import animation
from shapely.geometry import Point, LineString

def longest_line(mls):
    if isinstance(mls, LineString):
        return mls
    guess = list(mls)[0]
    for line in list(mls):
        if line.length > guess.length:
            guess = line
    return guess

def midline_f(uls, lls):
    def midline(x):
        upt = uls.interpolate(x, normalized=True)
        lpt = lls.interpolate(1.-x, normalized=True)
        return (upt.x + lpt.x) * 0.5, (upt.y + lpt.y) * 0.5
    return lambda x: np.array(np.vectorize(midline)(x))

Lines = namedtuple('Lines', ['uls', 'lls', 's_uls', 's_lls', 'ml'])
shift = 75000
n_area_points = 21
lines = []

for i in xrange(upline.shape[1]):
    uls = LineString(upline[:,i,:])
    lls = LineString(lowline[:,i,:])
    shifted_uls = longest_line(uls.parallel_offset(shift, 'left'))
    shifted_lls = longest_line(lls.parallel_offset(shift, 'right'))
    shifted_uls = longest_line(shifted_uls.parallel_offset(shift, 'right'))
    shifted_lls = longest_line(shifted_lls.parallel_offset(shift, 'right'))
    x = np.linspace(1-0.01, 0+0.01, n_area_points)
    mid_f = midline_f(shifted_uls, shifted_lls)
    ml = mid_f(x)
    lines.append(Lines(uls, lls, shifted_uls, shifted_lls, ml))
In [21]:
plt.figure(figsize=(7,6)); plt.gca().set_aspect(1.0); plt.xticks(()); plt.yticks(())
start, = plt.plot([lines[0].ml[0, 0], lines[0].uls.xy[0][0], lines[0].lls.xy[0][0]],
                  [lines[0].ml[1, 0], lines[0].uls.xy[1][0], lines[0].lls.xy[1][0]], 'o', c='k')
u_line, = plt.plot(lines[0].uls.xy[0], lines[0].uls.xy[1], c='b', lw=2)
l_line, = plt.plot(lines[0].lls.xy[0], lines[0].lls.xy[1], c='b', lw=2)
su_line, = plt.plot(lines[0].s_uls.xy[0], lines[0].s_uls.xy[1], c='r', lw=2)
sl_line, = plt.plot(lines[0].s_lls.xy[0], lines[0].s_lls.xy[1], c='r', lw=2)
ml_line, = plt.plot(lines[0].ml[0], lines[0].ml[1], c='k', lw=1)
def update(i):
    start.set_data([lines[i].ml[0, 0], lines[i].uls.xy[0][0], lines[i].lls.xy[0][0]],
                   [lines[i].ml[1, 0], lines[i].uls.xy[1][0], lines[i].lls.xy[1][0]])
    u_line.set_data(lines[i].uls.xy[0], lines[i].uls.xy[1])
    l_line.set_data(lines[i].lls.xy[0], lines[i].lls.xy[1])
    su_line.set_data(lines[i].s_uls.xy[0], lines[i].s_uls.xy[1])
    sl_line.set_data(lines[i].s_lls.xy[0], lines[i].s_lls.xy[1])
    ml_line.set_data(lines[i].ml[0], lines[i].ml[1])
    return start, u_line, l_line, su_line, sl_line, ml_line
animation.FuncAnimation(plt.gcf(), update, frames=len(lines), blit=True)
Out[21]:


Once Loop Reflect
In [22]:
# Generate area function
from shapely.geometry import Polygon, MultiPoint
from descartes.patch import PolygonPatch

def line_between(line, perp_line1, perp_line2):
    coords = list(line.coords)
    dists = sorted([line.project(pl.intersection(line)) for pl in (perp_line1, perp_line2)])
    lpt, upt = line.interpolate(dists[0]), line.interpolate(dists[1])
    for i, p in enumerate(coords):
        pd = line.project(Point(p))
        if pd >= dists[0]:
            lix = i
            break
    else:
        lix = None
    for i, p in enumerate(reversed(coords)):
        pd = line.project(Point(p))
        if pd <= dists[1]:
            uix = len(coords) - i
            break
    else:
        uix = None
    return LineString([(lpt.x, lpt.y)] + coords[lix: uix] + [(upt.x, upt.y)])

def perp_lines(ml, scale=2.6):
    pls = []
    for (x1, y1), (x2, y2) in zip(ml.T[:-1], ml.T[1:]):
        mid = Point((x1 + x2) * 0.5, (y1 + y2) * 0.5)
        dx, dy = x2 - x1, y2 - y1
        perp_line = LineString([(mid.x - scale * dy, mid.y + scale * dx),
                                (mid.x + scale * dy, mid.y - scale * dx)])
        pls.append(perp_line)
    return pls

def polygons(perp_lines, lls, uls, scale=2.6):
    p = []
    for p1, p2 in zip(perp_lines[:-1], perp_lines[1:]):
        cut_lls = line_between(lls, p1, p2)
        cut_uls = line_between(uls, p1, p2)
        p.append(Polygon(list(cut_lls.coords) + list(p1.coords) + list(cut_uls.coords) + list(p2.coords)))
    return p

def area_f(polys):
    return np.asarray([p.area / 1e10 for p in polys])  # NB: Figure out the right factor here!!

uls, lls, s_uls, s_lls, ml = lines[0]
pls = perp_lines(ml)
polys = polygons(pls, s_uls, s_lls)
area_0 = area_f(polys)
In [23]:
plt.figure(figsize=(15,5))
plt.subplot(131); plt.xticks(()); plt.yticks(()); plt.gca().set_aspect(1.0)
plt.plot(uls.xy[0], uls.xy[1], c='b', lw=2)
plt.plot(lls.xy[0], lls.xy[1], c='b', lw=2)
plt.plot(ml[0], ml[1], c='k', lw=1)
for (x1, y1), (x2, y2) in zip(ml.T[:-1], ml.T[1:]):
    plt.plot((x1 + x2) * 0.5, (y1 + y2) * 0.5, 'o', c='k')
for pl in pls:
    lpt = pl.intersection(s_lls)
    upt = pl.intersection(s_uls)
    plt.plot([lpt.x, upt.x], [lpt.y, upt.y], '-o', lw=2, c='r')
plt.subplot(132); plt.xticks(()); plt.yticks(()); plt.gca().set_aspect(1.0)
plt.plot(ml[0], ml[1], c='k', lw=1)
for poly in polys:
    plt.gca().add_patch(PolygonPatch(poly))
plt.xlim(plt.xlim()[0], plt.xlim()[1] * 1.1)
plt.ylim(plt.ylim()[0], plt.ylim()[1] * 1.1)
plt.subplot(133)
plt.bar(range(len(area_0)), area_0);
In [24]:
all_areas = []
for uls, lls, s_uls, s_lls, ml in lines:
    polys = polygons(perp_lines(ml), s_uls, s_lls)
    all_areas.append(area_f(polys))

Step 2: Glottal signal

Vocal folds oscillate quickly (400+ Hz) during voiced phonemes to produce sound.

Model: send pulse of "current" through the tubes for each oscillation.

In [25]:
# Use area function to create audio signal

# Part 1: glottal signal
def glottal_sig(t, articulators):
    voFoTen = articulators.voFoTen[0]
    voFoPos = articulators.voFoPos[0]
    sampfac = 20000. / 1000.  # sampling every 5ms, so need 
    glottF0 = voFoTen[0] * 0.2
    assert glottF0 < float('inf')
    glottT0S = 1000.0 / glottF0
    glottT0 = int(sampfac * glottT0S)
    glottAmp = -50.0
    glottTp = 0.4 * glottT0
    glottTe = int(0.55 * glottT0)
    glottTa = 0.02 * glottT0
    glottTpS = 0.4 * glottT0S
    glottTeS = 0.55 * glottT0S
    glottTaS = 0.005 * glottT0S
    fac01S = (glottT0S - glottTeS) / glottTaS
    glottDS = 1.0 - fac01S / (np.exp(fac01S) - 1.0)
    glottTxS = glottTeS * (1.0 - glottTeS * (0.5 * glottTeS-glottTpS)
                           / (glottTeS * (2.0 * glottTeS - 3.0 * glottTpS)
                              + 6.0 * glottTaS * (glottTeS - glottTpS) * glottDS))
    glottPulse = np.zeros(glottT0)
    iS = np.arange(glottTe + 1) / sampfac
    glottPulse[:glottTe+1] = glottAmp * iS * iS * (iS * iS - (4.0 / 3.0) * iS * (glottTpS + glottTxS) + 2.0 * glottTpS * glottTxS)
    #
    iS = np.arange(glottTe, glottT0) / sampfac
    fac02S = glottPulse[glottTe+1]
    fac03S = glottTaS * 4.0 * glottAmp * iS[0] * (glottTpS - iS[0]) * (glottTxS - iS[0])
    fac04S = (iS[1:] - glottTeS) / glottTaS
    fac05S = (glottT0S - glottTeS) / glottTaS
    glottPulse[glottTe+1:glottT0] =  fac02S + fac03S * (1.0 - np.exp(-fac04S) - fac04S * np.exp(-fac05S)
                                                        ) / (1.0 - np.exp(-fac05S))
    #
    glottPulse[glottTe+1:glottT0] -= glottPulse[-1]
    #
    glottSig = np.zeros(int(t.shape[0] * sampfac))  # sample in between times
    phobeg = np.where(voFoPos < 20)[0][0]
    phoend = np.where(voFoPos[phobeg:] > 20)[0][0] + phobeg
    phobeg *= int(sampfac)
    phoend *= int(sampfac)
    #
    pulseBeg = np.arange(phobeg, phoend, glottT0)
    pulseEnd = pulseBeg + glottT0
    pulseMaxExcit = pulseBeg + glottTe
    # Paste glottPulse at each pulseBeg
    for pulse in pulseBeg:
        glottSig[pulse:pulse + glottT0] += glottPulse
    #
    glott2Sig = np.diff(glottSig) * 10.0  # Not sure why * 10.0...
    kPulse = t[np.asarray(pulseEnd // sampfac, dtype=int)]
    
    return (glottSig, glott2Sig, pulseBeg, glottTe, glottT0,
            np.hstack([glottPulse, np.ones(glottTe * 2 - glottPulse.size) * glottPulse[-1]])) # extended glottPulse
In [26]:
gs, gsdiff, pb, te, t0, glottPulse = glottal_sig(t, ga)
plt.figure(figsize=(12, 6))
plt.subplot(121)
plt.plot(glottPulse); plt.plot(np.diff(glottPulse))
plt.xlim(right=150); plt.xticks(()), plt.yticks(())
plt.subplot(122)
plt.plot(gs); plt.plot(gsdiff)
plt.xlim(right=3200); plt.xticks(()), plt.yticks(());

Step 3: Wave propagation

Simulate the current propagating through the tubes of different impedance.



In [27]:
import os
Audio(os.path.expanduser('~/Dropbox/bjk_synth/__datacache__/mouth.wav'))
Out[27]:

Control model: gestures

Each phoneme is manually timed and mapped to a set of gestures.

Each gesture is mapped to a set of desired articulator states.

Summary

Articulatory synthesizers are made up of a vocal tract model, acoustic model, and control model.

I showed you a simple 2D geometrical vocal tract model, and a simple reflection-type line analog acoustic model.

Next steps: finish the acoustic model, implement a neural control model.

In [28]:
# Part 2: window signal
def window_sig(glottTe, glottT0, glottLen):
    winSig = np.zeros(glottLen)
    winSig[:glottTe] = 0.5 + 0.5 * np.cos((glottTe - np.arange(glottTe)) * np.pi / glottTe)
    winSig[glottTe:glottT0] = 1.0
    winSig[glottT0:] = 0.5 + 0.5 * np.cos((np.arange(glottT0, glottLen) - glottT0) * np.pi / (glottLen - glottT0))
    return winSig

gs, gsdiff, pb, te, t0, glottPulse = glottal_sig(t, ga)
ws = window_sig(te, t0, te * 2)
plt.plot(ws, lw=2, c='k')
plt.twinx()
plt.plot(glottPulse, lw=2)
plt.plot(glottPulse * ws, lw=2)
Out[28]:
[<matplotlib.lines.Line2D at 0x1084ddb10>]
In [29]:
# Part 3: tract pulse

def mue_f(areas):
    ari = np.hstack([[5, 0.5], areas])
    mue = np.diff(ari)
    mue /= (ari[:-1] + ari[1:])
    mue[np.isinf(mue)] = 0.0
    return ari, mue

def tract_pulse(areas, glottSigEx, pulseDur):
    Rho = 1.14e-3;
    Visc = 1.86e-4;
    vSchall = 35000.
    v2Schall = 2.0*35000.0
    tSamp = 1.0/20000.0
    ## 
    dWabFac = 0.01 
    ##
    dWabNasFac = 0.016956
    ##
    dVisFac = 0.005 
    ## Wallvibrations 
    kWal = 255.0
    oMinE = 1.0 - 50.* 2.* np.pi * tSamp;
    ## Laminar dc-flow loss 
    dLamFac = 4.0 * np.pi * Visc / (Rho * vSchall)
    ## Jet losses 
    dJetFac	= 0.5 / vSchall
    ###################################################################
    ulipalt = 0.0
    plusalt = 0.0
    minusalt = 0.0
    u2altWal = 0.0
    p2altWal = 0.0
    u1altWal = 0.0
    p1altWal = 0.0
    mouthSigEx = np.zeros(glottSigEx.shape[0])

    ari, mue = mue_f(areas)

    dampFac = 0.99
    
    plus = np.zeros_like(ari)
    minus = np.zeros_like(ari)
    mSig = 0.0
    j2 = 0
    j2end = 250
    ari1beg = 0.5
    ari1max = 5.0 
    ariStep = (ari1max-ari1beg) / j2end
    a = 0.0779 + 0.2373 * np.sqrt(areas[-1])
    b = -0.8430 + 0.3062 * np.sqrt(areas[-1])

    for j in range(glottSigEx.shape[0]):
        if j > pulseDur:
            j2 += 1
            ari[1] = min(ari1beg + ariStep * j2, ari1max)
            mue[1] = (ari[2] - ari[1]) / (ari[2] + ari[1])

        # Tract:
        plus[1] = glottSigEx[j]

        for i in range(1, len(areas) - 1, 2):
            if i == 5:
                pWal = plus[i] + minus[i] * Rho * vSchall / ari[i]
                pWal = (pWal - p1altWal) / 20. + p1altWal
                uWal = (p1altWal - p2altWal) / kWal + oMinE * (2.0 * u1altWal - oMinE * u2altWal)
                u2altWal = u1altWal
                p2altWal = p1altWal
                u1altWal = uWal
                p1altWal = pWal
            dum = dampFac * mue[i] * (plus[i] + minus[i + 1])
            plus[i + 1] = plus[i] + dum
            minus[i] = minus[i + 1] - dum
            if i == 5:
                plus[i + 1] -= (1 + mue[i]) * uWal
                minus[i] -= (1 - mue[i]) * uWal

        for i in range(2, len(areas) - 1, 2):
            dum = mue[i] * (plus[i] + minus[i + 1])
            minus[i] = minus[i + 1] - dum
            if i == 8:
                plus[i + 1] = 0.8 * (plus[i] + dum)
                plus[i + 3] = 0.2 * (plus[i] + dum)
            else:
                plus[i + 1] = plus[i] + dum

        ulip = ((a + b) * ulipalt + 2. * plus[-1] - 2.0 * b * plusalt) / (a + 1.0)
        minus[-1] = ((a + b) * minusalt + (a - 1.0) * plus[-1] + (b - a) * plusalt) / (a + 1.0)
        mouthSigEx[j] = (ulip - ulipalt) * 10.0 + uWal * 0.1
        ulipalt = ulip
        plusalt = plus[-1]
        minusalt = minus[-1]

    return mouthSigEx

gs, gsdiff, pb, te, t0, pulse = glottal_sig(t, ga)
ws = window_sig(te, t0, te * 2)
t_pulse = tract_pulse(area_0, pulse, t0)
wt_pulse = tract_pulse(area_0, pulse * ws, t0)
plt.plot(t_pulse)
plt.plot(wt_pulse)
Out[29]:
[<matplotlib.lines.Line2D at 0x106241750>]
In [30]:
gs, gsdiff, pb, te, t0, glottPulse = glottal_sig(t, ga)
ws = window_sig(te, t0, te * 2)
w_pulse = ws[:-1] * np.diff(pulse)

audio = np.array(gs) * 0.001

for pix in pb:
    # 20 = sampfac in glottal_sig
    audio[pix: pix+te*2-1] += tract_pulse(all_areas[pix / 20], w_pulse, t0)

plt.plot(audio)
Out[30]:
[<matplotlib.lines.Line2D at 0x10850e410>]
In [31]:
from IPython.display import Audio
Audio(audio, rate=20000)
Out[31]: